#include <stdio.h>
#include <string.h>
#include <string>
#include <xbyak/xbyak.h>
#include <xbyak/xbyak_util.h>
#include <cybozu/inttype.hpp>
#include <cybozu/test.hpp>
#include <algorithm>

using namespace Xbyak;

CYBOZU_TEST_AUTO(ymm_with_sae)
{
	struct Code : Xbyak::CodeGenerator {
		Code()
		{
			vaddpd(ymm1, ymm2, ymm3 |T_rn_sae);
			vaddph(ymm1, ymm2, ymm3 |T_rn_sae);
			vaddps(ymm1, ymm2, ymm3 |T_rn_sae);
			vcmppd(k1, ymm2, ymm3 |T_sae, 3);
			vcmpph(k1, ymm2, ymm3 |T_sae, 3);
			vcmpps(k1, ymm2, ymm3 |T_sae, 3);
			vcvtdq2ph(xmm1, ymm2 |T_rn_sae);
			vcvtdq2ps(ymm1, ymm2 |T_rn_sae);
			vcvtpd2dq(xmm1, ymm2 |T_rn_sae);
			vcvtpd2ph(xmm1, ymm2 |T_rn_sae);
			vcvtpd2ps(xmm1, ymm2 |T_rn_sae);
			vcvtpd2qq(ymm1, ymm2 |T_rn_sae);
			vcvtpd2udq(xmm1, ymm2 |T_rn_sae);
			vcvtpd2uqq(ymm1, ymm2 |T_rn_sae);
			vcvtph2dq(ymm1, xmm2 |T_rn_sae);
			vcvtph2pd(ymm1, xmm2 |T_sae);
			vcvtph2ps(ymm1, xmm2 |T_sae);
			vcvtph2psx(ymm1, xmm2 |T_sae);
			vcvtph2qq(ymm1, xmm2 |T_rn_sae);
			vcvtph2udq(ymm1, xmm2 |T_rn_sae);
			vcvtph2uqq(ymm1, xmm2 |T_rn_sae);
			vcvtph2uw(ymm1, ymm2 |T_rn_sae);
			vcvtph2w(ymm1, ymm2 |T_rn_sae);
			vcvtps2dq(ymm1, ymm2 |T_rn_sae);
			vcvtps2pd(ymm1, xmm2 |T_sae);
			vcvtps2ph(xmm1, ymm2 |T_sae, 3);
			vcvtps2phx(xmm1, ymm2 |T_rn_sae);
			vcvtps2qq(ymm1, xmm2 |T_rn_sae);
			vcvtps2udq(ymm1, ymm2 |T_rn_sae);
			vcvtps2uqq(ymm1, xmm2 |T_rn_sae);
			vcvtqq2pd(ymm1, ymm2 |T_rn_sae);
			vcvtqq2ph(xmm1, ymm2 |T_rn_sae);
			vcvtqq2ps(xmm1, ymm2 |T_rn_sae);
			vcvttpd2dq(xmm1, ymm2 |T_sae);
			vcvttpd2qq(ymm1, ymm2 |T_sae);
			vcvttpd2udq(xmm1, ymm2 |T_sae);
			vcvttpd2uqq(ymm1, ymm2 |T_sae);
			vcvttph2dq(ymm1, xmm2 |T_sae);
			vcvttph2qq(ymm1, xmm2 |T_sae);
			vcvttph2udq(ymm1, xmm2 |T_sae);
			vcvttph2uqq(ymm1, xmm2 |T_sae);
			vcvttph2uw(ymm1, ymm2 |T_sae);
			vcvttph2w(ymm1, ymm2 |T_sae);
			vcvttps2dq(ymm1, ymm2 |T_sae);
			vcvttps2qq(ymm1, xmm2 |T_sae);
			vcvttps2udq(ymm1, ymm2 |T_sae);
			vcvttps2uqq(ymm1, xmm2 |T_sae);
			vcvtudq2ph(xmm1, ymm2 |T_rn_sae);
			vcvtudq2ps(ymm1, ymm2 |T_rn_sae);
			vcvtuqq2pd(ymm1, ymm2 |T_rn_sae);
			vcvtuqq2ph(xmm1, ymm2 |T_rn_sae);
			vcvtuqq2ps(xmm1, ymm2 |T_rn_sae);
			vcvtuw2ph(ymm1, ymm2 |T_rn_sae);
			vcvtw2ph(ymm1, ymm2 |T_rn_sae);
			vdivpd(ymm1, ymm2, ymm3 |T_rn_sae);
			vdivph(ymm1, ymm2, ymm3 |T_rn_sae);
			vdivps(ymm1, ymm2, ymm3 |T_rn_sae);
			vfcmaddcph(ymm1, ymm2, ymm3 |T_rn_sae);
			vfcmulcph(ymm1, ymm2, ymm3 |T_rn_sae);
			vfixupimmpd(ymm1, ymm2, ymm3 |T_sae, 3);
			vfixupimmps(ymm1, ymm2, ymm3 |T_sae, 3);
			vfmadd132pd(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmadd132ph(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmadd132ps(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmadd213pd(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmadd213ph(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmadd213ps(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmadd231pd(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmadd231ph(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmadd231ps(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmaddcph(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmaddsub132pd(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmaddsub132ph(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmaddsub132ps(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmaddsub213pd(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmaddsub213ph(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmaddsub213ps(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmaddsub231pd(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmaddsub231ph(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmaddsub231ps(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmsub132pd(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmsub132ph(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmsub132ps(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmsub213pd(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmsub213ph(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmsub213ps(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmsub231pd(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmsub231ph(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmsub231ps(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmsubadd132pd(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmsubadd132ph(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmsubadd132ps(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmsubadd213pd(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmsubadd213ph(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmsubadd213ps(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmsubadd231pd(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmsubadd231ph(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmsubadd231ps(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmulcph(ymm1, ymm2, ymm3 |T_rn_sae);
			vfnmadd132pd(ymm1, ymm2, ymm3 |T_rn_sae);
			vfnmadd132ph(ymm1, ymm2, ymm3 |T_rn_sae);
			vfnmadd132ps(ymm1, ymm2, ymm3 |T_rn_sae);
			vfnmadd213pd(ymm1, ymm2, ymm3 |T_rn_sae);
			vfnmadd213ph(ymm1, ymm2, ymm3 |T_rn_sae);
			vfnmadd213ps(ymm1, ymm2, ymm3 |T_rn_sae);
			vfnmadd231pd(ymm1, ymm2, ymm3 |T_rn_sae);
			vfnmadd231ph(ymm1, ymm2, ymm3 |T_rn_sae);
			vfnmadd231ps(ymm1, ymm2, ymm3 |T_rn_sae);
			vfnmsub132pd(ymm1, ymm2, ymm3 |T_rn_sae);
			vfnmsub132ph(ymm1, ymm2, ymm3 |T_rn_sae);
			vfnmsub132ps(ymm1, ymm2, ymm3 |T_rn_sae);
			vfnmsub213pd(ymm1, ymm2, ymm3 |T_rn_sae);
			vfnmsub213ph(ymm1, ymm2, ymm3 |T_rn_sae);
			vfnmsub213ps(ymm1, ymm2, ymm3 |T_rn_sae);
			vfnmsub231pd(ymm1, ymm2, ymm3 |T_rn_sae);
			vfnmsub231ph(ymm1, ymm2, ymm3 |T_rn_sae);
			vfnmsub231ps(ymm1, ymm2, ymm3 |T_rn_sae);
			vgetexppd(ymm1, ymm2 |T_sae);
			vgetexpph(ymm1, ymm2 |T_sae);
			vgetexpps(ymm1, ymm2 |T_sae);
			vgetmantpd(ymm1, ymm2 |T_sae, 3);
			vgetmantph(ymm1, ymm2 |T_sae, 3);
			vgetmantps(ymm1, ymm2 |T_sae, 3);
			vmaxpd(ymm1, ymm2, ymm3 |T_sae);
			vmaxph(ymm1, ymm2, ymm3 |T_sae);
			vmaxps(ymm1, ymm2, ymm3 |T_sae);
			vminpd(ymm1, ymm2, ymm3 |T_sae);
			vminph(ymm1, ymm2, ymm3 |T_sae);
			vminps(ymm1, ymm2, ymm3 |T_sae);
			vmulpd(ymm1, ymm2, ymm3 |T_rn_sae);
			vmulph(ymm1, ymm2, ymm3 |T_rn_sae);
			vmulps(ymm1, ymm2, ymm3 |T_rn_sae);
			vrangepd(ymm1, ymm2, ymm3 |T_sae, 3);
			vrangeps(ymm1, ymm2, ymm3 |T_sae, 3);
			vreducepd(ymm1, ymm2 |T_sae, 3);
			vreduceph(ymm1, ymm2 |T_sae, 3);
			vreduceps(ymm1, ymm2 |T_sae, 3);
			vrndscalepd(ymm1, ymm2 |T_sae, 3);
			vrndscaleph(ymm1, ymm2 |T_sae, 3);
			vrndscaleps(ymm1, ymm2 |T_sae, 3);
			vscalefpd(ymm1, ymm2, ymm3 |T_rn_sae);
			vscalefph(ymm1, ymm2, ymm3 |T_rn_sae);
			vscalefps(ymm1, ymm2, ymm3 |T_rn_sae);
			vsqrtpd(ymm1, ymm2 |T_rn_sae);
			vsqrtph(ymm1, ymm2 |T_rn_sae);
			vsqrtps(ymm1, ymm2 |T_rn_sae);
			vsubpd(ymm1, ymm2, ymm3 |T_rn_sae);
			vsubph(ymm1, ymm2, ymm3 |T_rn_sae);
			vsubps(ymm1, ymm2, ymm3 |T_rn_sae);
		}
	} c;
	const uint8_t tbl[] = {
		0x62, 0xf1, 0xe9, 0x18, 0x58, 0xcb, 0x62, 0xf5, 0x68, 0x18, 0x58, 0xcb, 0x62, 0xf1, 0x68, 0x18,
		0x58, 0xcb, 0x62, 0xf1, 0xe9, 0x18, 0xc2, 0xcb, 0x03, 0x62, 0xf3, 0x68, 0x18, 0xc2, 0xcb, 0x03,
		0x62, 0xf1, 0x68, 0x18, 0xc2, 0xcb, 0x03, 0x62, 0xf5, 0x78, 0x18, 0x5b, 0xca, 0x62, 0xf1, 0x78,
		0x18, 0x5b, 0xca, 0x62, 0xf1, 0xfb, 0x18, 0xe6, 0xca, 0x62, 0xf5, 0xf9, 0x18, 0x5a, 0xca, 0x62,
		0xf1, 0xf9, 0x18, 0x5a, 0xca, 0x62, 0xf1, 0xf9, 0x18, 0x7b, 0xca, 0x62, 0xf1, 0xf8, 0x18, 0x79,
		0xca, 0x62, 0xf1, 0xf9, 0x18, 0x79, 0xca, 0x62, 0xf5, 0x79, 0x18, 0x5b, 0xca, 0x62, 0xf5, 0x78,
		0x18, 0x5a, 0xca, 0x62, 0xf2, 0x79, 0x18, 0x13, 0xca, 0x62, 0xf6, 0x79, 0x18, 0x13, 0xca, 0x62,
		0xf5, 0x79, 0x18, 0x7b, 0xca, 0x62, 0xf5, 0x78, 0x18, 0x79, 0xca, 0x62, 0xf5, 0x79, 0x18, 0x79,
		0xca, 0x62, 0xf5, 0x78, 0x18, 0x7d, 0xca, 0x62, 0xf5, 0x79, 0x18, 0x7d, 0xca, 0x62, 0xf1, 0x79,
		0x18, 0x5b, 0xca, 0x62, 0xf1, 0x78, 0x18, 0x5a, 0xca, 0x62, 0xf3, 0x79, 0x18, 0x1d, 0xd1, 0x03,
		0x62, 0xf5, 0x79, 0x18, 0x1d, 0xca, 0x62, 0xf1, 0x79, 0x18, 0x7b, 0xca, 0x62, 0xf1, 0x78, 0x18,
		0x79, 0xca, 0x62, 0xf1, 0x79, 0x18, 0x79, 0xca, 0x62, 0xf1, 0xfa, 0x18, 0xe6, 0xca, 0x62, 0xf5,
		0xf8, 0x18, 0x5b, 0xca, 0x62, 0xf1, 0xf8, 0x18, 0x5b, 0xca, 0x62, 0xf1, 0xf9, 0x18, 0xe6, 0xca,
		0x62, 0xf1, 0xf9, 0x18, 0x7a, 0xca, 0x62, 0xf1, 0xf8, 0x18, 0x78, 0xca, 0x62, 0xf1, 0xf9, 0x18,
		0x78, 0xca, 0x62, 0xf5, 0x7a, 0x18, 0x5b, 0xca, 0x62, 0xf5, 0x79, 0x18, 0x7a, 0xca, 0x62, 0xf5,
		0x78, 0x18, 0x78, 0xca, 0x62, 0xf5, 0x79, 0x18, 0x78, 0xca, 0x62, 0xf5, 0x78, 0x18, 0x7c, 0xca,
		0x62, 0xf5, 0x79, 0x18, 0x7c, 0xca, 0x62, 0xf1, 0x7a, 0x18, 0x5b, 0xca, 0x62, 0xf1, 0x79, 0x18,
		0x7a, 0xca, 0x62, 0xf1, 0x78, 0x18, 0x78, 0xca, 0x62, 0xf1, 0x79, 0x18, 0x78, 0xca, 0x62, 0xf5,
		0x7b, 0x18, 0x7a, 0xca, 0x62, 0xf1, 0x7b, 0x18, 0x7a, 0xca, 0x62, 0xf1, 0xfa, 0x18, 0x7a, 0xca,
		0x62, 0xf5, 0xfb, 0x18, 0x7a, 0xca, 0x62, 0xf1, 0xfb, 0x18, 0x7a, 0xca, 0x62, 0xf5, 0x7b, 0x18,
		0x7d, 0xca, 0x62, 0xf5, 0x7a, 0x18, 0x7d, 0xca, 0x62, 0xf1, 0xe9, 0x18, 0x5e, 0xcb, 0x62, 0xf5,
		0x68, 0x18, 0x5e, 0xcb, 0x62, 0xf1, 0x68, 0x18, 0x5e, 0xcb, 0x62, 0xf6, 0x6b, 0x18, 0x56, 0xcb,
		0x62, 0xf6, 0x6b, 0x18, 0xd6, 0xcb, 0x62, 0xf3, 0xe9, 0x18, 0x54, 0xcb, 0x03, 0x62, 0xf3, 0x69,
		0x18, 0x54, 0xcb, 0x03, 0x62, 0xf2, 0xe9, 0x18, 0x98, 0xcb, 0x62, 0xf6, 0x69, 0x18, 0x98, 0xcb,
		0x62, 0xf2, 0x69, 0x18, 0x98, 0xcb, 0x62, 0xf2, 0xe9, 0x18, 0xa8, 0xcb, 0x62, 0xf6, 0x69, 0x18,
		0xa8, 0xcb, 0x62, 0xf2, 0x69, 0x18, 0xa8, 0xcb, 0x62, 0xf2, 0xe9, 0x18, 0xb8, 0xcb, 0x62, 0xf6,
		0x69, 0x18, 0xb8, 0xcb, 0x62, 0xf2, 0x69, 0x18, 0xb8, 0xcb, 0x62, 0xf6, 0x6a, 0x18, 0x56, 0xcb,
		0x62, 0xf2, 0xe9, 0x18, 0x96, 0xcb, 0x62, 0xf6, 0x69, 0x18, 0x96, 0xcb, 0x62, 0xf2, 0x69, 0x18,
		0x96, 0xcb, 0x62, 0xf2, 0xe9, 0x18, 0xa6, 0xcb, 0x62, 0xf6, 0x69, 0x18, 0xa6, 0xcb, 0x62, 0xf2,
		0x69, 0x18, 0xa6, 0xcb, 0x62, 0xf2, 0xe9, 0x18, 0xb6, 0xcb, 0x62, 0xf6, 0x69, 0x18, 0xb6, 0xcb,
		0x62, 0xf2, 0x69, 0x18, 0xb6, 0xcb, 0x62, 0xf2, 0xe9, 0x18, 0x9a, 0xcb, 0x62, 0xf6, 0x69, 0x18,
		0x9a, 0xcb, 0x62, 0xf2, 0x69, 0x18, 0x9a, 0xcb, 0x62, 0xf2, 0xe9, 0x18, 0xaa, 0xcb, 0x62, 0xf6,
		0x69, 0x18, 0xaa, 0xcb, 0x62, 0xf2, 0x69, 0x18, 0xaa, 0xcb, 0x62, 0xf2, 0xe9, 0x18, 0xba, 0xcb,
		0x62, 0xf6, 0x69, 0x18, 0xba, 0xcb, 0x62, 0xf2, 0x69, 0x18, 0xba, 0xcb, 0x62, 0xf2, 0xe9, 0x18,
		0x97, 0xcb, 0x62, 0xf6, 0x69, 0x18, 0x97, 0xcb, 0x62, 0xf2, 0x69, 0x18, 0x97, 0xcb, 0x62, 0xf2,
		0xe9, 0x18, 0xa7, 0xcb, 0x62, 0xf6, 0x69, 0x18, 0xa7, 0xcb, 0x62, 0xf2, 0x69, 0x18, 0xa7, 0xcb,
		0x62, 0xf2, 0xe9, 0x18, 0xb7, 0xcb, 0x62, 0xf6, 0x69, 0x18, 0xb7, 0xcb, 0x62, 0xf2, 0x69, 0x18,
		0xb7, 0xcb, 0x62, 0xf6, 0x6a, 0x18, 0xd6, 0xcb, 0x62, 0xf2, 0xe9, 0x18, 0x9c, 0xcb, 0x62, 0xf6,
		0x69, 0x18, 0x9c, 0xcb, 0x62, 0xf2, 0x69, 0x18, 0x9c, 0xcb, 0x62, 0xf2, 0xe9, 0x18, 0xac, 0xcb,
		0x62, 0xf6, 0x69, 0x18, 0xac, 0xcb, 0x62, 0xf2, 0x69, 0x18, 0xac, 0xcb, 0x62, 0xf2, 0xe9, 0x18,
		0xbc, 0xcb, 0x62, 0xf6, 0x69, 0x18, 0xbc, 0xcb, 0x62, 0xf2, 0x69, 0x18, 0xbc, 0xcb, 0x62, 0xf2,
		0xe9, 0x18, 0x9e, 0xcb, 0x62, 0xf6, 0x69, 0x18, 0x9e, 0xcb, 0x62, 0xf2, 0x69, 0x18, 0x9e, 0xcb,
		0x62, 0xf2, 0xe9, 0x18, 0xae, 0xcb, 0x62, 0xf6, 0x69, 0x18, 0xae, 0xcb, 0x62, 0xf2, 0x69, 0x18,
		0xae, 0xcb, 0x62, 0xf2, 0xe9, 0x18, 0xbe, 0xcb, 0x62, 0xf6, 0x69, 0x18, 0xbe, 0xcb, 0x62, 0xf2,
		0x69, 0x18, 0xbe, 0xcb, 0x62, 0xf2, 0xf9, 0x18, 0x42, 0xca, 0x62, 0xf6, 0x79, 0x18, 0x42, 0xca,
		0x62, 0xf2, 0x79, 0x18, 0x42, 0xca, 0x62, 0xf3, 0xf9, 0x18, 0x26, 0xca, 0x03, 0x62, 0xf3, 0x78,
		0x18, 0x26, 0xca, 0x03, 0x62, 0xf3, 0x79, 0x18, 0x26, 0xca, 0x03, 0x62, 0xf1, 0xe9, 0x18, 0x5f,
		0xcb, 0x62, 0xf5, 0x68, 0x18, 0x5f, 0xcb, 0x62, 0xf1, 0x68, 0x18, 0x5f, 0xcb, 0x62, 0xf1, 0xe9,
		0x18, 0x5d, 0xcb, 0x62, 0xf5, 0x68, 0x18, 0x5d, 0xcb, 0x62, 0xf1, 0x68, 0x18, 0x5d, 0xcb, 0x62,
		0xf1, 0xe9, 0x18, 0x59, 0xcb, 0x62, 0xf5, 0x68, 0x18, 0x59, 0xcb, 0x62, 0xf1, 0x68, 0x18, 0x59,
		0xcb, 0x62, 0xf3, 0xe9, 0x18, 0x50, 0xcb, 0x03, 0x62, 0xf3, 0x69, 0x18, 0x50, 0xcb, 0x03, 0x62,
		0xf3, 0xf9, 0x18, 0x56, 0xca, 0x03, 0x62, 0xf3, 0x78, 0x18, 0x56, 0xca, 0x03, 0x62, 0xf3, 0x79,
		0x18, 0x56, 0xca, 0x03, 0x62, 0xf3, 0xf9, 0x18, 0x09, 0xca, 0x03, 0x62, 0xf3, 0x78, 0x18, 0x08,
		0xca, 0x03, 0x62, 0xf3, 0x79, 0x18, 0x08, 0xca, 0x03, 0x62, 0xf2, 0xe9, 0x18, 0x2c, 0xcb, 0x62,
		0xf6, 0x69, 0x18, 0x2c, 0xcb, 0x62, 0xf2, 0x69, 0x18, 0x2c, 0xcb, 0x62, 0xf1, 0xf9, 0x18, 0x51,
		0xca, 0x62, 0xf5, 0x78, 0x18, 0x51, 0xca, 0x62, 0xf1, 0x78, 0x18, 0x51, 0xca, 0x62, 0xf1, 0xe9,
		0x18, 0x5c, 0xcb, 0x62, 0xf5, 0x68, 0x18, 0x5c, 0xcb, 0x62, 0xf1, 0x68, 0x18, 0x5c, 0xcb,
	};
	const size_t n = sizeof(tbl) / sizeof(tbl[0]);
	CYBOZU_TEST_EQUAL(c.getSize(), n);
	CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
}

CYBOZU_TEST_AUTO(vmpsadbw)
{
	struct Code : Xbyak::CodeGenerator {
		Code()
		{
			setDefaultEncodingAVX10();
			vmpsadbw(xm1, xm3, xm15, 3); // vex(avx)
			vmpsadbw(ym1, ym3, ptr[rax+128], 3); // vex(avx2)
			setDefaultEncodingAVX10(AVX10v2Encoding);
			vmpsadbw(ym1, ym3, ym15, 3); // evex(avx10.2)
			vmpsadbw(ym1, ym3, ptr[rax+128], 3); // evex(avx10.2)
		}
	} c;
	const uint8_t tbl[] = {
		0xc4, 0xc3, 0x61, 0x42, 0xcf, 0x03,
		0xc4, 0xe3, 0x65, 0x42, 0x88, 0x80, 0x00, 0x00, 0x00, 0x03,
		0x62, 0xd3, 0x66, 0x28, 0x42, 0xcf, 0x03,
		0x62, 0xf3, 0x66, 0x28, 0x42, 0x48, 0x04, 0x03,
	};
	const size_t n = sizeof(tbl) / sizeof(tbl[0]);
	CYBOZU_TEST_EQUAL(c.getSize(), n);
	CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
}
